** NOTE: THE CODE BELOW IS FROM DICKERSON, HOLE AND MUNFORD (2011)
* source: http://www.stata.com/meeting/uk11/abstracts/UK11_Hole.pdf

****************************************************
**************** Columns 1 & 2 *********************
****************************************************
clear all
use /Users/yasenov/Dropbox/data_dta/master.dta, clear
set matsize 800
set more off

tostring id grade year, replace
gen course = grade + " " + subject + " " + cohort
gen class = year + " " + course
gen studentxclass = id + " " + class
gen gradebook = year + grade + cohort
destring id grade year, replace

*the following replicates exactly table 3
*maybe should've put i.subject i.grade i.cohort - the results dont change qualitatively
global controls male bulg age month num_month order_ass semester stem

set more off
eststo: xi: oprobit marks late_start $controls, vce(cluster id)
margins, dydx(late_start) atmeans

set more off
eststo: xi: ologit marks late_start $controls, vce(cluster id)
margins, dydx(late_start) atmeans

***********************************************
**************** Column 3 *********************
***********************************************
* source: http://www.stata.com/meeting/uk11/abstracts/UK11_Hole.pdf
* see http://www.stata.com/manuals13/rclogit.pdf for clogit manual
* in clogit: Specifying vce(robust) is equivalent to specifying vce(cluster groupvar),
* where groupvar is the variable for the matched groups
* Greene ch21 has good explanations

clear all
use C:\Users\yasenov\Dropbox\data_dta\master.dta
gen month3 = month*month*month

global y marks 
global x late_start num_month order_ass

** IMPORTANT: THIS CHOOSES THE FIXED EFFECT
global id studentxclass // Specify name of id variable after the first "id"

* Mark estimation sample
marksample touse
markout `touse' $y $x $id

* Run clogit for each cutoff and combine using suest
tempvar esample
gen `esample' = 0
tempname BMAT
set more off
forvalues i = 3(1)6 {
	tempvar y`i'
	gen `y`i'' = $y >= `i' // dichotmizing
	clogit `y`i'' $x, group($id)
	replace `esample' = 1 if e(sample)
	estimates store `y`i''
	local suest `suest' `y`i'' // ?
	capture matrix `BMAT' = `BMAT', e(b) // this just accumulates the coefficients
	if (_rc != 0) matrix `BMAT' = e(b) // ?
}
qui suest `suest'

* Calculate Das and Van Soest estimates
tempname VMAT A B COV // A is H; VMAT is sigma; B is the DvS estimates
local k : word count $x
matrix `VMAT' = e(V)
matrix `A' = J(4,1,1)#I(`k')
matrix `B' = (invsym(`A''*invsym(`VMAT')*`A')*`A''*invsym(`VMAT')*`BMAT'')'
matrix `COV' = invsym(`A''*invsym(`VMAT')*`A')

* Tidy up matrix names and present results
matrix colnames `B' = $x
matrix coleq `B' = :
matrix colnames `COV' = $x
matrix coleq `COV' = :
matrix rownames `COV' = $x
matrix roweq `COV' = :

cou if `esample'
local obs = r(N)
ereturn post `B' `COV', depname($y) obs(`obs') esample(`esample')
ereturn display

* Calculate the number of individuals
tempvar last
bysort $id: gen `last' = _n==_N if e(sample)
cou if `last'== 1
